The CICA Windows Explosion!

home *** CD-ROM | disk | FTP | other *** search

/ The CICA Windows Explosion! / The CICA Windows Explosion! - Disc 2.iso / misc / gs261src.zip / iutilasm.asm < prev next >

Wrap

Assembly Source File | 1993-05-13 | 9KB | 459 lines

; Copyright (C) 1989, 1992, 1993 Aladdin Enterprises. All rights reserved. ; ; This file is part of Ghostscript. ; ; Ghostscript is distributed in the hope that it will be useful, but ; WITHOUT ANY WARRANTY. No author or distributor accepts responsibility ; to anyone for the consequences of using it or for whether it serves any ; particular purpose or works at all, unless he says so in writing. Refer ; to the Ghostscript General Public License for full details. ; ; Everyone is granted permission to copy, modify and redistribute ; Ghostscript, but only under the conditions described in the Ghostscript ; General Public License. A copy of this license is supposed to have been ; given to you along with Ghostscript so you can know your rights and ; responsibilities. It should be in a file named COPYING. Among other ; things, the copyright notice and this notice must be preserved on all ; copies. ; iutilasm.asm ; Assembly code for Ghostscript interpreter on MS-DOS systems ifdef FOR80386 .286c endif utilasm_TEXT SEGMENT WORD PUBLIC 'CODE' ASSUME CS:utilasm_TEXT ifdef FOR80386 ; Macro for 32-bit operand prefix. OP32 macro db 66h endm endif ; FOR80386 ifdef FOR80386 ; Replace the multiply and divide routines in the Turbo C library ; if we are running on an 80386. ; Macro to swap the halves of a 32-bit register. ; Unfortunately, masm won't allow a shift instruction with a count of 16, ; so we have to code it in hex. swap macro regno OP32 db 0c1h,0c0h+regno,16 ; rol regno,16 endm regax equ 0 regcx equ 1 regdx equ 2 regbx equ 3 ; Multiply (dx,ax) by (cx,bx) to (dx,ax). PUBLIC LXMUL@ PUBLIC F_LXMUL@ F_LXMUL@ proc far LXMUL@ proc far swap regdx mov dx,ax swap regcx mov cx,bx OP32 db 0fh,0afh,0d1h ; imul dx,cx OP32 mov ax,dx swap regdx ret LXMUL@ endp F_LXMUL@ endp ; Divide two stack operands, leave the result in (dx,ax). ifdef DEBUG setup32 macro mov bx,sp push bp mov bp,sp OP32 mov ax,ss:[bx+4] ; dividend endm ret32 macro n mov sp,bp pop bp ret n endm else ; !DEBUG setup32 macro mov bx,sp OP32 mov ax,ss:[bx+4] ; dividend endm ret32 macro n ret n endm endif ; (!)DEBUG PUBLIC LDIV@, LUDIV@, LMOD@, LUMOD@ PUBLIC F_LDIV@, F_LUDIV@, F_LMOD@, F_LUMOD@ F_LDIV@ proc far LDIV@ proc far setup32 OP32 cwd OP32 idiv word ptr ss:[bx+8] ; divisor OP32 mov dx,ax swap regdx ret32 8 LDIV@ endp F_LDIV@ endp F_LUDIV@ proc far LUDIV@ proc far setup32 OP32 xor dx,dx OP32 div word ptr ss:[bx+8] ; divisor OP32 mov dx,ax swap regdx ret32 8 LUDIV@ endp F_LUDIV@ endp F_LMOD@ proc far LMOD@ proc far setup32 OP32 cwd OP32 idiv word ptr ss:[bx+8] ; divisor OP32 mov ax,dx swap regdx ret32 8 LMOD@ endp F_LMOD@ endp F_LUMOD@ proc far LUMOD@ proc far setup32 OP32 xor dx,dx OP32 div word ptr ss:[bx+8] ; divisor OP32 mov ax,dx swap regdx ret32 8 LUMOD@ endp F_LUMOD@ endp else ; !FOR80386 ; Replace the divide routines in the Turbo C library, ; which do the division one bit at a time (!). PUBLIC LDIV@, LMOD@, LUDIV@, LUMOD@ PUBLIC F_LDIV@, F_LMOD@, F_LUDIV@, F_LUMOD@ ; Negate a long on the stack. negbp macro offset neg word ptr [bp+offset+2] ; high part neg word ptr [bp+offset] ; low part sbb word ptr [bp+offset+2],0 endm ; Negate a long in (dx,ax). negr macro neg dx neg ax sbb dx,0 endm ; Divide two unsigned longs on the stack. ; Leave either the quotient or the remainder in (dx,ax). ; Operand offsets assume that bp (and only bp) has been pushed. nlo equ 6 nhi equ 8 dlo equ 10 dhi equ 12 ; We use an offset in bx to distinguish div from mod, ; and to indicate whether the result should be negated. odiv equ 0 omod equ 2 odivneg equ 4 omodneg equ 6 F_LMOD@ proc far LMOD@ proc far push bp mov bp,sp mov bx,omod ; Take abs of denominator cmp byte ptr [bp+dhi+1],bh ; bh = 0 jge modpd negbp dlo modpd: ; Negate mod if numerator < 0 cmp byte ptr [bp+nhi+1],bh ; bh = 0 jge udiv mov bx,omodneg negnum: negbp nlo jmp udiv LMOD@ endp F_LMOD@ endp F_LUMOD@ proc far LUMOD@ proc far mov bx,omod jmp udpush LUMOD@ endp F_LUMOD@ endp F_LDIV@ proc far LDIV@ proc far push bp mov bp,sp mov bx,odiv ; Negate quo if num^den < 0 mov ax,[bp+nhi] xor ax,[bp+dhi] jge divabs mov bx,odivneg divabs: ; Take abs of denominator cmp byte ptr [bp+dhi+1],bh ; bh = 0 jge divpd negbp dlo divpd: ; Take abs of numerator cmp byte ptr [bp+nhi+1],bh ; bh = 0 jge udiv jmp negnum LDIV@ endp F_LDIV@ endp F_LUDIV@ proc far LUDIV@ proc far mov bx,odiv udpush: push bp mov bp,sp udiv: push bx ; odiv, omod, odivneg, omodneg mov ax,[bp+nlo] mov dx,[bp+nhi] mov bx,[bp+dlo] mov cx,[bp+dhi] ; Now we are dividing dx:ax by cx:bx. ; Check to see whether this is really a 32/16 division. or cx,cx jnz div2 ; 32/16, check for 16- vs. 32-bit quotient cmp dx,bx jae div1 ; 32/16 with 16-bit quotient, just do it. div bx ; ax = quo, dx = rem pop bx pop bp jmp cs:xx1[bx] even xx1 dw offset divx1 dw offset modx1 dw offset divx1neg dw offset modx1neg modx1: mov ax,dx divx1: xor dx,dx ret 8 modx1neg: mov ax,dx divx1neg: xor dx,dx rneg: negr ret 8 ; 32/16 with 32-bit quotient, do in 2 parts. div1: mov cx,ax ; save lo num mov ax,dx xor dx,dx div bx ; ax = hi quo xchg cx,ax ; save hi quo, get lo num div bx ; ax = lo quo, dx = rem pop bx pop bp jmp cs:xx1a[bx] even xx1a dw offset divx1a dw offset modx1 dw offset divx1aneg dw offset modx1neg divx1a: mov dx,cx ; hi quo ret 8 divx1aneg: mov dx,cx jmp rneg ; This is really a 32/32 bit division. ; (Note that the quotient cannot exceed 16 bits.) ; The following algorithm is taken from pp. 235-240 of Knuth, vol. 2 ; (first edition). ; Start by normalizing the numerator and denominator. div2: or ch,ch jz div21 ; ch == 0, but cl != 0 ; Do 8 steps all at once. mov bl,bh mov bh,cl mov cl,ch xor ch,ch mov al,ah mov ah,dl mov dl,dh xor dh,dh rol bx,1 ; faster than jmp div2a: rcr bx,1 ; finish previous shift div21: shr dx,1 rcr ax,1 shr cx,1 jnz div2a rcr bx,1 ; Now we can do a 32/16 divide. div2x: div bx ; ax = quo, dx = rem ; Multiply by the denominator, and correct the result. mov cx,ax ; save quotient mul word ptr [bp+dhi] mov bx,ax ; save lo part of hi product mov ax,cx mul word ptr [bp+dlo] add dx,bx ; Now cx = trial quotient, (dx,ax) = cx * denominator. not dx neg ax cmc adc dx,0 ; double-precision neg jc divz ; zero quotient ; requires special handling add ax,[bp+nlo] adc dx,[bp+nhi] jc divx ; Quotient is too large, adjust it. div3: dec cx add ax,[bp+dlo] adc dx,[bp+dhi] jnc div3 ; All done. (dx,ax) = remainder, cx = lo quotient. divx: pop bx pop bp jmp cs:xx3[bx] even xx3 dw offset divx3 dw offset modx3 dw offset divx3neg dw offset modx3neg divx3: mov ax,cx xor dx,dx modx3: ret 8 divx3neg: mov ax,cx xor dx,dx modx3neg: jmp rneg ; Handle zero quotient specially. divz: pop bx jmp cs:xxz[bx] even xxz dw offset divxz dw offset modxz dw offset divxz dw offset modxzneg divxz: pop bp ret 8 modxzneg: negbp nlo modxz: mov ax,[bp+nlo] mov dx,[bp+nhi] pop bp ret 8 LUDIV@ endp F_LUDIV@ endp endif ; FOR80386 ; Transpose an 8x8 bit matrix. See gsmisc.c for the algorithm in C. PUBLIC _memflip8x8 _memflip8x8 proc far push ds push si push di ; After pushing, the offsets of the parameters are: ; byte *inp=10, int line_size=14, byte *outp=16, int dist=20. mov si,sp mov di,ss:[si+14] ; line_size lds si,ss:[si+10] ; inp ; We assign variables to registers as follows: ; ax = AE, bx = BF, cx (or di) = CG, dx = DH. ; Load the input data. Initially we assign ; ax = AB, bx = EF, cx (or di) = CD, dx = GH. mov ah,[si] iload macro reg add si,di mov reg,[si] endm iload al iload ch iload cl iload bh iload bl iload dh iload dl ; Transposition macro, see C code for explanation. trans macro reg1,reg2,shift,mask mov si,reg1 shr si,shift xor si,reg2 and si,mask xor reg2,si shl si,shift xor reg1,si endm ; Do 4x4 transpositions mov di,cx ; we need cl for the shift count mov cl,4 trans bx,ax,cl,0f0fh trans dx,di,cl,0f0fh ; Swap B/E, D/G xchg al,bh mov cx,di xchg cl,dh ; Do 2x2 transpositions mov di,cx ; need cl again mov cl,2 trans di,ax,cl,3333h trans dx,bx,cl,3333h mov cx,di ; done shifting >1 ; Do 1x1 transpositions trans bx,ax,1,5555h trans dx,cx,1,5555h ; Store result mov si,sp mov di,ss:[si+20] ; dist lds si,ss:[si+16] ; outp mov [si],ah istore macro reg add si,di mov [si],reg endm istore bh istore ch istore dh istore al istore bl istore cl istore dl ; All done pop di pop si pop ds ret _memflip8x8 ENDP utilasm_TEXT ENDS END